package com.chaopei.day10.mynet;

import sun.net.www.protocol.https.HttpsURLConnectionImpl;

import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @Author Corday 天涯
 * @Date 2022/3/16 10:26
 */
public class MyNet {
    //实现简单网络爬虫程序
    public static void playWeb(String urls) {
//      声明各种对象
        URL url = null;
        HttpsURLConnectionImpl huc = null;
        InputStream in = null;
        BufferedReader br = null;
        FileWriter fw = null;
//      创建各种对象
        try {
            url = new URL(urls);
            huc = (HttpsURLConnectionImpl) url.openConnection();
            fw = new FileWriter("c:/temp/web.txt");
            //PrintWriter pw = new PrintWriter(fw,true);
            //进行网络操作
            //(1)进行网络操作之前准备工作
            huc.setConnectTimeout(2000);
            huc.setReadTimeout(2000);
            huc.setRequestMethod("GET");
            //(2)进行网络的连接并输入输出数据
            huc.connect();
            in = huc.getInputStream();
            //转换成字符流
            br = new BufferedReader(new InputStreamReader(in));
            //通过正则表达式工具提取出网页中的超链接地址，并保存
            //提取：https://www.chaoke.com
            //https://www.chaoke.com.cn
            String regx = "https://\\w+\\.\\w+\\.[A-Za-z]+";
            //生成正则表达式模式对象
            Pattern p = Pattern.compile(regx);
            String str = null;
            while ((str = br.readLine()) != null) {
                //找出网址，生成正则集合对象matcher
                Matcher matcher = p.matcher(str);
                while (matcher.find()) {//相当于是hasnext()
                    //添加回车换行符
                    String temp = matcher.group()+"\r\n";
                    fw.write(temp);
                    //pw.print(temp);
                }
            }//while
            in.close();
            fw.close();
            huc.disconnect();
            System.out.println("抓取成功!");
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

// 测试一下
class TestMyNet {
    public static void main(String[] args) {
        MyNet.playWeb("https://www.baidu.com");
    }//main
}